knitr::opts_chunk$set(
echo = TRUE,
warning = FALSE,
message = FALSE)
knitr::opts_knit$set(root.dir = '~/Documents/NCBA/species/')
This demo requires the tidyverse packages.
library(tidyverse)
Load the NCBA functions because this function relies upon the output from some of them.
setwd("~/Code/NCBA/resources")
source("ncba_functions.R")
config <- "~/Documents/NCBA/Scripts/ncba_config.R"
This document details a function that generates a boxplot of breeding codes with some customization options and supports the ability to view the checklist behind individual data points by clicking on them in the figure. The function offers several options as parameters.
species – common name of the species data – data frame of ebird or NCBA data type – whether to create an interactive plot that supports opening checklist URLs by clicking on data points in the figure, a non-interactive plot, or a non-interactive plot separated by ecoregion. pallet – specify an RColorBrewer pallet (multiple colors), or a single color (name or hex) for the figure. omit_codes – specify evidence codes not be plotted. lump – an option to lump breeding codes into fewer categories. drop – TRUE or FALSE whether to include unreported codes in the plot
breeding_boxplot
## function (species, data, type = "interactive", pallet = "Paired",
## omit_codes = NULL, lump = NULL, drop = TRUE, cex.x.axis = 0.9,
## cex.y.axis = 0.8, subtitle = NULL)
## {
## library(lubridate)
## library(grid)
## library(gridBase)
## library(RColorBrewer)
## library(ggiraph)
## library(ggplot2)
## ebird <- data
## ebird["breeding_code"][ebird["breeding_code"] == ""] <- "NULL"
## ebird$observation_date <- sub("^20\\d\\d", "2050", ebird$observation_date)
## ebird$breeding_code <- trimws(ebird$breeding_code)
## ebird$obsdate <- as.Date(ebird$observation_date, "%Y-%m-%d")
## if (is.null(lump) == FALSE) {
## drop <- TRUE
## }
## if (is.null(omit_codes) == FALSE) {
## drop <- TRUE
## }
## codelevels <- c("H", "S", "S7", "M", "T", "P", "C", "B",
## "CN", "NB", "A", "N", "DD", "ON", "NE", "FS", "CF", "NY",
## "FY", "FL", "PE", "UN", "F", "O", "NC", "NULL")
## if (is.null("lump") == FALSE) {
## codelevels <- c(codelevels, names(lump)[!names(lump) %in%
## codelevels])
## }
## if (!all(ebird$breeding_code %in% codelevels)) {
## warn <- paste("Not all eBird codes (breeding_code) for",
## species, "are in codelevels")
## warning(warn)
## }
## if (drop == FALSE) {
## missing_codes <- codelevels[!codelevels %in% ebird$breeding_code]
## missing <- data.frame(matrix(ncol = ncol(ebird), nrow = length(missing_codes)))
## names(missing) <- names(ebird)
## missing$breeding_code <- missing_codes
## ebird <- rbind(ebird, missing)
## ebird <- ebird %>% mutate(breeding_code = factor(ebird$breeding_code,
## levels = codelevels, ordered = TRUE))
## }
## if (drop == TRUE) {
## if (is.null("omit_codes") == FALSE) {
## ebird <- ebird[!ebird$breeding_code %in% omit_codes,
## ]
## }
## for (i in seq_along(lump)) {
## indx <- ebird$breeding_code %in% lump[[i]]
## ebird[indx, "breeding_code"] <- names(lump)[i]
## }
## codelevels <- codelevels[codelevels %in% ebird$breeding_code]
## if (is.null("omit_codes") == FALSE) {
## codelevels <- codelevels[!codelevels %in% omit_codes]
## }
## ebird <- ebird %>% mutate(breeding_code = factor(ebird$breeding_code,
## levels = codelevels, ordered = TRUE))
## }
## if (pallet %in% rownames(brewer.pal.info)) {
## n <- brewer.pal.info[pallet, "maxcolors"]
## codecolors <- colorRampPalette(brewer.pal(n, pallet))(length(codelevels))
## }
## else {
## codecolors <- rep(pallet, length(codelevels))
## }
## names(codecolors) <- codelevels
## ebird$col <- codecolors[ebird$breeding_code]
## if (type == "non-interactive") {
## boxplot(obsdate ~ breeding_code, horizontal = TRUE, cex.axis = cex.y.axis,
## xaxt = "n", data = ebird, border = "white", main = species,
## las = 2, xlab = "Calendar Day", ylab = "Breeding Code",
## show.names = TRUE, na.action = na.pass)
## have_dates <- subset(ebird, is.na(observation_date) ==
## FALSE)
## date0 <- round_date(min(have_dates$obsdate), "month")
## date1 <- round_date(max(have_dates$obsdate), "month")
## labels <- seq(from = date0, to = date1, by = "month")
## if (length(unique(month(have_dates$obsdate))) == 1) {
## labels <- c(min(have_dates$obsdate), max(have_dates$obsdate))
## labels <- unique(labels)
## }
## else {
## if (nrow(have_dates) > 1 && length(labels) == 1) {
## labels <- unique(c(min(have_dates$obsdate), max(have_dates$obsdate)))
## }
## }
## names(labels) <- format(labels, "%b %d")
## vps <- baseViewports()
## pushViewport(vps$inner, vps$figure, vps$plot)
## grid.text(names(labels), x = unit(labels, "native"),
## y = unit(-0.7, "lines"), just = "right", rot = 65,
## gp = gpar(cex = cex.x.axis))
## popViewport(3)
## axis(1, labels, labels = FALSE)
## col <- unique(ebird$col)
## stripchart(obsdate ~ breeding_code, data = ebird, vertical = FALSE,
## method = "jitter", pch = 16, col = col, add = TRUE,
## na.action = na.pass)
## boxplot(obsdate ~ breeding_code, horizontal = TRUE, col = "#F5F5F500",
## yaxt = "n", xaxt = "n", data = ebird, add = TRUE,
## na.action = na.pass)
## }
## if (type == "ecoregional") {
## fields <- c("ID_BLOCK", "ID_EBD_NAME", "ECOREGION", "COUNTY",
## "ID_WEB_BLOCKMAP")
## blocks <- get_blocks(ncba_config = config, spatial = FALSE,
## fields = fields, crs = 4326)
## records2 <- left_join(ebird, blocks, by = c(ncba_block = "ID_EBD_NAME")) %>%
## filter(is.na(ECOREGION) == FALSE)
## records2$ECOREGION[records2$ECOREGION == "CP"] <- "Coastal Plain"
## records2$ECOREGION[records2$ECOREGION == "P"] <- "Piedmont"
## records2$ECOREGION[records2$ECOREGION == "M"] <- "Mountains"
## records2$ECOREGION <- factor(records2$ECOREGION, levels = c("Coastal Plain",
## "Piedmont", "Mountains"))
## result <- ggplot(data = records2) + geom_boxplot(aes(x = obsdate,
## y = breeding_code)) + facet_wrap(~ECOREGION, nrow = 3) +
## labs(y = "Breeding Code", x = "Calendar Day")
## plot(result)
## }
## if (type == "interactive") {
## ebird$front <- "https://ebird.org/checklist/"
## ebird$ChecklistLink <- with(ebird, paste0(front, sampling_event_identifier))
## gg_point = ggplot(data = ebird) + labs(y = "Breeding Code",
## x = "Calendar Day") + geom_boxplot(aes(x = obsdate,
## y = breeding_code)) + geom_point_interactive(aes(x = obsdate,
## y = breeding_code, color = col, tooltip = obsdate,
## data_id = obsdate, onclick = paste0("window.open(\"",
## ChecklistLink, "\", \"_blank\")")), show.legend = FALSE,
## position = position_jitter(width = 0.2, height = 0.2)) +
## theme_minimal() + labs(title = species)
## girafe(ggobj = gg_point, width_svg = 10, options = list(opts_sizing(rescale = TRUE)))
## }
## }
Identify a species to investigate.
species <- "Loggerhead Shrike"
print(species)
## [1] "Loggerhead Shrike"
Retrieve the records for the species from the Atlas Cache
time1 <- proc.time()
nc_data <- get_observations(species, NCBA_only = TRUE, EBD_fields_only = FALSE,
ncba_config = config)
# format columns to the standard analysis format (ebd format)
records <- to_EBD_format(nc_data, drop=FALSE)
# Calculate processing time
mongotime <- proc.time() - time1
# Print number of records returned
print(paste("Records returned:", nrow(records)))
## [1] "Records returned: 1304"
print(paste("Runtime: ", mongotime[["elapsed"]]))
## [1] "Runtime: 6.239"
Make a non-interactive boxplot without lumping codes together
breeding_boxplot(species, records, type = "non-interactive")
Make an interactive boxplot without lumping codes together
breeding_boxplot(species, records)
Create an interactive boxplot with some of the codes lumped together.
lump <- list(S = c("S", "S7", "M"), O = c("NULL", "F", "O", "NC"))
breeding_boxplot(species, records, lump=lump)
Create an interactive boxplot that omits some codes and uses a different colormap.
omit <- c("S7", "M", "NC", "H", "T", "P", "C", "CN", "N", "A", "NB",
"FS")
breeding_boxplot(species, records, type = "interactive", pallet="Set3",
omit_codes=omit, lump=NULL, drop=TRUE)
Make a figure that has a boxplot for each ecoregion.
breeding_boxplot(species = species, data = records, type = "ecoregional",
lump = breeding_codes(lumped = TRUE))
Fully programmed tests are not feasible because the function returns a figure, but we can summarize the aspects of the dataset behind the figure and visually compare them to what the figure shows. From the cells above, the input data frame is named “records”.
lump <- list(X = c("S", "S7", "M", "H", "NULL", "P", "F", "UN", "PE", "NY"),
Z = c("A", "N", "CN", "T", "NB","FS", "FL", "CF", "FY", "C",
"ON", "NE", "B"))
breeding_boxplot(species, records, type = "non-interactive", lump=lump)
breeding_boxplot(species, records, type = "interactive", lump=lump)
breeding_boxplot(species, records, type = "ecoregional", lump=lump)
A standard lumping list is available for the primary categories.
lumped_codes <- breeding_codes()
breeding_boxplot(species, records, type = "interactive", lump=lumped_codes)
omit <- c("S", "M", "H", "NULL", "P", "A", "N", "CN", "T", "NB","FS", "CF", "C")
breeding_boxplot(species, records, type = "non-interactive", omit_codes = omit)
breeding_boxplot(species, records, type = "interactive", omit_codes = omit)
breeding_boxplot(species, records, type = "ecoregional", omit_codes = omit)
breeding_boxplot(species, records, pallet="Spectral")
breeding_boxplot(species, records, pallet="Paired")
breeding_boxplot(species, records, type="non-interactive", pallet="Spectral")
breeding_boxplot(species, records, type="non-interactive", pallet="Paired")
breeding_boxplot(species, records, type="non-interactive", drop=FALSE)
breeding_boxplot(species, records, drop=FALSE)
breeding_boxplot(species, records, type="ecoregional", drop=FALSE)
# Print the unique breeding codes in records and the figure
print(unique(records$breeding_code))
## [1] "" "P" "S" "ON" "H" "CF" "FY" "CN" "FL" "N" "PE" "S7" "A" "NB" "F"
## [16] "NY" "T"
breeding_boxplot(species, records, type="interactive", drop=TRUE)
breeding_boxplot(species, records, type="non-interactive", drop=TRUE)
breeding_boxplot(species, records, type="ecoregional", drop=TRUE)
Run the function several times to get descriptive statistics.
# Run the function 10 times and record the runtime
time <- c()
for (i in 1:10) {
time1 <- proc.time()
breeding_boxplot(species, records, type = "non-interactive")
t <- proc.time() - time1
time[i] <- t["elapsed"]
}
# Print the descriptive statistics
print(summary(time))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0200 0.0320 0.0320 0.0316 0.0330 0.0360
Results of the speed test were…
Min. 1st Qu. Median Mean 3rd Qu. Max. 0.0220 0.0240 0.0250 0.0261 0.0260 0.0470